{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Quasi-Poisson Regression Part 2"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# yahoo finance is used to fetch data \n",
        "import yfinance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-07-24T05:19:58.738Z",
          "iopub.execute_input": "2021-07-24T05:19:58.741Z",
          "iopub.status.idle": "2021-07-24T05:19:59.173Z",
          "shell.execute_reply": "2021-07-24T05:19:59.192Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2018-08-27'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/plain": "            Open  High   Low  Close  Adj Close    Volume\nDate                                                    \n2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700",
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Open</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Close</th>\n      <th>Adj Close</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.85</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>3.98</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.01</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.19</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.23</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2021-07-24T05:19:59.180Z",
          "iopub.execute_input": "2021-07-24T05:19:59.185Z",
          "iopub.status.idle": "2021-07-24T05:19:59.827Z",
          "shell.execute_reply": "2021-07-24T05:19:59.822Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
        "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()"
      ],
      "outputs": [],
      "execution_count": 3,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-07-24T05:19:59.835Z",
          "iopub.execute_input": "2021-07-24T05:19:59.839Z",
          "shell.execute_reply": "2021-07-24T05:19:59.867Z",
          "iopub.status.idle": "2021-07-24T05:19:59.852Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from statsmodels.genmod.generalized_estimating_equations import GEE\n",
        "from statsmodels.genmod.cov_struct import (Exchangeable,\n",
        "    Independence,Autoregressive)\n",
        "from statsmodels.genmod.families import Poisson"
      ],
      "outputs": [],
      "execution_count": 4,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-07-24T05:19:59.858Z",
          "iopub.execute_input": "2021-07-24T05:19:59.861Z",
          "iopub.status.idle": "2021-07-24T05:20:00.161Z",
          "shell.execute_reply": "2021-07-24T05:20:00.179Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "fam = Poisson()\n",
        "ind = Independence()\n",
        "model1 = GEE.from_formula(\"Increase_Decrease ~ Returns + Buy_Sell_on_Open + Open\", 'Buy_Sell', dataset, cov_struct=ind, family=fam)\n",
        "result1 = model1.fit()\n",
        "print(result1.summary())"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "                               GEE Regression Results                              \n",
            "===================================================================================\n",
            "Dep. Variable:           Increase_Decrease   No. Observations:                 1170\n",
            "Model:                                 GEE   No. clusters:                        2\n",
            "Method:                        Generalized   Min. cluster size:                 584\n",
            "                      Estimating Equations   Max. cluster size:                 586\n",
            "Family:                            Poisson   Mean cluster size:               585.0\n",
            "Dependence structure:         Independence   Num. iterations:                     2\n",
            "Date:                     Fri, 23 Jul 2021   Scale:                           1.000\n",
            "Covariance type:                    robust   Time:                         22:20:00\n",
            "====================================================================================\n",
            "                       coef    std err          z      P>|z|      [0.025      0.975]\n",
            "------------------------------------------------------------------------------------\n",
            "Intercept           -0.7826      0.017    -45.953      0.000      -0.816      -0.749\n",
            "Returns              0.9742      1.267      0.769      0.442      -1.508       3.457\n",
            "Buy_Sell_on_Open    -0.0671      0.172     -0.390      0.696      -0.404       0.270\n",
            "Open                 0.0036      0.003      1.180      0.238      -0.002       0.010\n",
            "==============================================================================\n",
            "Skew:                          0.1802   Kurtosis:                      -1.9614\n",
            "Centered skew:                 0.1789   Centered kurtosis:             -1.9459\n",
            "==============================================================================\n"
          ]
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-07-24T05:20:00.168Z",
          "iopub.execute_input": "2021-07-24T05:20:00.172Z",
          "iopub.status.idle": "2021-07-24T05:20:00.189Z",
          "shell.execute_reply": "2021-07-24T05:20:00.231Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset[[ 'Returns', 'Buy_Sell_on_Open', 'Open', 'Buy_Sell']]\n",
        "y = dataset['Increase_Decrease']"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-07-24T05:20:00.195Z",
          "iopub.execute_input": "2021-07-24T05:20:00.198Z",
          "iopub.status.idle": "2021-07-24T05:20:00.206Z",
          "shell.execute_reply": "2021-07-24T05:20:00.235Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, \n",
        "                                   test_size=0.3, random_state=101)"
      ],
      "outputs": [],
      "execution_count": 7,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-07-24T05:20:00.214Z",
          "iopub.execute_input": "2021-07-24T05:20:00.220Z",
          "iopub.status.idle": "2021-07-24T05:20:00.300Z",
          "shell.execute_reply": "2021-07-24T05:20:00.293Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "y_pred = result1.predict(X_test)"
      ],
      "outputs": [],
      "execution_count": 8,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-07-24T05:20:00.305Z",
          "iopub.execute_input": "2021-07-24T05:20:00.308Z",
          "iopub.status.idle": "2021-07-24T05:20:00.315Z",
          "shell.execute_reply": "2021-07-24T05:20:00.339Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn import metrics\n",
        "print('MAE:', metrics.mean_absolute_error(y_test, y_pred))\n",
        "print('MSE:', metrics.mean_squared_error(y_test, y_pred))\n",
        "print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "MAE: 0.4931482836890034\n",
            "MSE: 0.24554553581305322\n",
            "RMSE: 0.4955255147952053\n"
          ]
        }
      ],
      "execution_count": 9,
      "metadata": {
        "collapsed": true,
        "jupyter": {
          "source_hidden": false,
          "outputs_hidden": false
        },
        "nteract": {
          "transient": {
            "deleting": false
          }
        },
        "execution": {
          "iopub.status.busy": "2021-07-24T05:20:00.321Z",
          "iopub.execute_input": "2021-07-24T05:20:00.324Z",
          "iopub.status.idle": "2021-07-24T05:20:00.332Z",
          "shell.execute_reply": "2021-07-24T05:20:00.342Z"
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.6.12",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.28.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}